Preregistration - Main Analysis

Author

Isaac Chen

Published

December 22, 2025

Set up

library(here)
library(tidyverse)
library(interactions)
library(effects)

# Clear all variables from environment
rm(list = ls())

# Set input files
merg_dir  <- here("data", "phenotype", "merged")
participants_rds <- here(merg_dir, "preregistration_participants.rds")
  
# Read in data
participants_df <- readRDS(participants_rds)

Staistical Models

Three-way interactions

# Model summary
lm1 <- lm(postC_TP ~ rs * fi * age, data = participants_df)

summary(lm1)

Call:
lm(formula = postC_TP ~ rs * fi * age, data = participants_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8208 -0.7172 -0.1592  0.6233  2.9190 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.816e+01  1.847e+01  -0.983    0.326
rs           2.075e-01  1.829e-01   1.134    0.258
fi           1.421e-01  1.728e-01   0.823    0.411
age          1.410e+00  1.579e+00   0.893    0.373
rs:fi       -1.485e-03  1.687e-03  -0.880    0.379
rs:age      -1.453e-02  1.569e-02  -0.926    0.355
fi:age      -9.854e-03  1.495e-02  -0.659    0.510
rs:fi:age    1.034e-04  1.462e-04   0.707    0.480

Residual standard error: 0.9254 on 367 degrees of freedom
Multiple R-squared:  0.02331,   Adjusted R-squared:  0.004684 
F-statistic: 1.251 on 7 and 367 DF,  p-value: 0.2737

# Model summary
lm2 <- lm(postC_vOT ~ pa * fi * age, data = participants_df)

summary(lm2)

Call:
lm(formula = postC_vOT ~ pa * fi * age, data = participants_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.9408 -0.9520 -0.1301  0.7319  7.7811 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)
(Intercept)  8.2609236 13.7430567   0.601    0.548
pa          -0.7357772  1.4013639  -0.525    0.600
fi          -0.0609217  0.1351357  -0.451    0.652
age         -0.3662704  1.1670458  -0.314    0.754
pa:fi        0.0071180  0.0133612   0.533    0.595
pa:age       0.0655427  0.1191572   0.550    0.583
fi:age       0.0043890  0.0116460   0.377    0.706
pa:fi:age   -0.0005646  0.0011542  -0.489    0.625

Residual standard error: 1.46 on 367 degrees of freedom
Multiple R-squared:  0.05082,   Adjusted R-squared:  0.03272 
F-statistic: 2.807 on 7 and 367 DF,  p-value: 0.007377

# Model summary
lm3 <- lm(postC_IFG ~ rs * fi * age, data = participants_df)

summary(lm3)

Call:
lm(formula = postC_IFG ~ rs * fi * age, data = participants_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.5483 -0.8977  0.0205  0.8576  3.6728 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.476e+01  2.728e+01  -1.274    0.203
rs           4.261e-01  2.702e-01   1.577    0.116
fi           2.554e-01  2.552e-01   1.001    0.317
age          3.317e+00  2.332e+00   1.422    0.156
rs:fi       -3.182e-03  2.491e-03  -1.277    0.202
rs:age      -3.677e-02  2.317e-02  -1.587    0.113
fi:age      -2.296e-02  2.207e-02  -1.040    0.299
rs:fi:age    2.837e-04  2.159e-04   1.314    0.190

Residual standard error: 1.367 on 367 degrees of freedom
Multiple R-squared:  0.09512,   Adjusted R-squared:  0.07786 
F-statistic: 5.511 on 7 and 367 DF,  p-value: 4.839e-06

# Model summary
lm4 <- lm(postC_IFG ~ pa * fi * age, data = participants_df)

summary(lm4)

Call:
lm(formula = postC_IFG ~ pa * fi * age, data = participants_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.7103 -0.9442  0.0270  0.7908  3.7541 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)
(Intercept) 18.449552  12.836837   1.437    0.152
pa          -1.106791   1.308958  -0.846    0.398
fi          -0.196688   0.126225  -1.558    0.120
age         -1.274162   1.090091  -1.169    0.243
pa:fi        0.013572   0.012480   1.087    0.278
pa:age       0.098866   0.111300   0.888    0.375
fi:age       0.016473   0.010878   1.514    0.131
pa:fi:age   -0.001149   0.001078  -1.066    0.287

Residual standard error: 1.364 on 367 degrees of freedom
Multiple R-squared:  0.09873,   Adjusted R-squared:  0.08154 
F-statistic: 5.743 on 7 and 367 DF,  p-value: 2.535e-06

Two-way interactions

# Model summary
lm5 <- lm(postC_TP ~ rs * fi + age, data = participants_df)

summary(lm5)

Call:
lm(formula = postC_TP ~ rs * fi + age, data = participants_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8060 -0.7047 -0.1290  0.6074  2.8957 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.6371535  3.0254661  -0.211    0.833
rs           0.0274541  0.0293125   0.937    0.350
fi           0.0162414  0.0281761   0.576    0.565
age          0.0050875  0.0266422   0.191    0.849
rs:fi       -0.0001765  0.0002718  -0.649    0.516

Residual standard error: 0.9257 on 370 degrees of freedom
Multiple R-squared:  0.01465,   Adjusted R-squared:  0.004002 
F-statistic: 1.376 on 4 and 370 DF,  p-value: 0.2418
# Interaction effect plot
lm5_effect <- interact_plot(
  model = lm5, 
  pred = rs, 
  modx = fi, 
  modx.values = c(70, 85, 100, 115, 130),
  main.title = "Model 5: TP ~ RS + FI + Age + (RS * FI)",
  legend.main = "Fluid Intelligence",
  plot.points = T,
  colors = c("#d7191c", "#fdae61", "#fee08b", "#a6d96a", "#1a9641"),
  point.alpha = 0.8
)

fig_lm5_effect <- lm5_effect +
  scale_linetype_manual(
    name = "Fluid Intelligence",
    values = c("solid", "solid", "solid", "solid", "solid")
  ) + 
  scale_x_continuous(limits = c(55, 145), breaks = seq(55, 145, 10)) + 
  scale_y_continuous(limits = c(0, 5), breaks = seq(0, 5, 1)) + 
  labs(
    x = "Reading Skill", 
    y = "Neural Basis of Phonological Processing (TP)",
  ) + 
  theme_bw()
Scale for linetype is already present.
Adding another scale for linetype, which will replace the existing scale.
print(fig_lm5_effect)

# Model summary
lm6 <- lm(postC_vOT ~ pa * fi + age, data = participants_df)

summary(lm6)

Call:
lm(formula = postC_vOT ~ pa * fi + age, data = participants_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.9700 -0.9499 -0.1133  0.7301  7.7663 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  2.0808880  2.2149516   0.939 0.348101    
pa           0.0459044  0.2183460   0.210 0.833598    
fi          -0.0081904  0.0213102  -0.384 0.700946    
age          0.1477063  0.0422976   3.492 0.000537 ***
pa:fi        0.0003966  0.0020782   0.191 0.848763    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.455 on 370 degrees of freedom
Multiple R-squared:  0.04958,   Adjusted R-squared:  0.0393 
F-statistic: 4.825 on 4 and 370 DF,  p-value: 0.0008353
# Interaction effect plot
lm6_effect <- interact_plot(
  model = lm6, 
  pred = pa, 
  modx = fi, 
  modx.values = c(70, 85, 100, 115, 130),
  main.title = "Model 6: vOT ~ PA + FI + Age + (PA * FI)",
  legend.main = "Fluid Intelligence",
  plot.points = T,
  colors = c("#d7191c", "#fdae61", "#fee08b", "#a6d96a", "#1a9641"),
  point.alpha = 0.8
)

fig_lm6_effect <- lm6_effect +
  scale_linetype_manual(
    name = "Fluid Intelligence",
    values = c("solid", "solid", "solid", "solid", "solid")
  ) + 
  scale_x_continuous(limits = c(2, 19), breaks = seq(2, 19, 2)) + 
  scale_y_continuous(limits = c(0, 12), breaks = seq(0, 12, 2)) +
  labs(
    x = "Phonological Awareness", 
    y = "Neural Basis of Orthographical Processing (vOT)",
  ) + 
  theme_bw()
Scale for linetype is already present.
Adding another scale for linetype, which will replace the existing scale.
print(fig_lm6_effect)

# Model summary
lm7 <- lm(postC_IFG ~ rs * fi + age, data = participants_df)

summary(lm7)

Call:
lm(formula = postC_IFG ~ rs * fi + age, data = participants_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.6235 -0.9319  0.0079  0.8642  4.1131 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  2.2586711  4.4934053   0.503    0.615    
rs          -0.0078712  0.0435348  -0.181    0.857    
fi          -0.0194869  0.0418470  -0.466    0.642    
age          0.2057261  0.0395689   5.199 3.32e-07 ***
rs:fi        0.0001764  0.0004036   0.437    0.662    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.375 on 370 degrees of freedom
Multiple R-squared:  0.07668,   Adjusted R-squared:  0.0667 
F-statistic: 7.682 on 4 and 370 DF,  p-value: 5.906e-06
# Interaction effect plot
lm7_effect <- interact_plot(
  model = lm7, 
  pred = rs, 
  modx = fi, 
  modx.values = c(70, 85, 100, 115, 130),
  main.title = "Model 7: IFG ~ RS + FI + Age + (RS * FI)",
  legend.main = "Fluid Intelligence",
  plot.points = T,
  colors = c("#d7191c", "#fdae61", "#fee08b", "#a6d96a", "#1a9641"),
  point.alpha = 0.8
)

fig_lm7_effect <- lm7_effect +
  scale_linetype_manual(
    name = "Fluid Intelligence",
    values = c("solid", "solid", "solid", "solid", "solid")
  ) + 
  scale_x_continuous(limits = c(55, 145), breaks = seq(55, 145, 10)) +
  scale_y_continuous(limits = c(0, 8.5), breaks = seq(0, 8, 2)) +
  labs(
    x = "Reading Skill", 
    y = "Neural Basis of Lexical Processing (IFG)",
  ) + 
  theme_bw()
Scale for linetype is already present.
Adding another scale for linetype, which will replace the existing scale.
print(fig_lm7_effect)

# Model summary
lm8 <- lm(postC_IFG ~ pa * fi + age, data = participants_df)

summary(lm8)

Call:
lm(formula = postC_IFG ~ pa * fi + age, data = participants_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.7276 -0.9506  0.0004  0.8417  3.9236 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.5824645  2.0808128   0.761    0.447    
pa          -0.0021519  0.2051229  -0.010    0.992    
fi          -0.0109773  0.0200197  -0.548    0.584    
age          0.2169956  0.0397360   5.461 8.71e-08 ***
pa:fi        0.0007597  0.0019523   0.389    0.697    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.367 on 370 degrees of freedom
Multiple R-squared:  0.08713,   Adjusted R-squared:  0.07726 
F-statistic: 8.828 on 4 and 370 DF,  p-value: 8.118e-07
# Interaction effect plot
lm8_effect <- interact_plot(
  model = lm8, 
  pred = pa, 
  modx = fi, 
  modx.values = c(70, 85, 100, 115, 130),
  main.title = "Model 8: IFG ~ PA + FI + Age + (PA * FI)",
  legend.main = "Fluid Intelligence",
  plot.points = T,
  colors = c("#d7191c", "#fdae61", "#fee08b", "#a6d96a", "#1a9641"),
  point.alpha = 0.8
)

fig_lm8_effect <- lm8_effect +
  scale_linetype_manual(
    name = "Fluid Intelligence",
    values = c("solid", "solid", "solid", "solid", "solid")
  ) + 
  scale_x_continuous(limits = c(2, 19), breaks = seq(2, 19, 2)) + 
  scale_y_continuous(limits = c(0, 8.5), breaks = seq(0, 8, 2)) + 
  labs(
    x = "Phonological Awareness", 
    y = "Neural Basis of Lexical Processing (IFG)",
  ) + 
  theme_bw()
Scale for linetype is already present.
Adding another scale for linetype, which will replace the existing scale.
print(fig_lm8_effect)

Main effects

# Model summary
lm9 <- lm(postC_TP ~ rs + fi + age, data = participants_df)

summary(lm9)

Call:
lm(formula = postC_TP ~ rs + fi + age, data = participants_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.7935 -0.7017 -0.1285  0.6000  2.8353 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)  
(Intercept)  1.289278   0.594501   2.169   0.0307 *
rs           0.008584   0.003860   2.224   0.0268 *
fi          -0.001885   0.003848  -0.490   0.6245  
age          0.005009   0.026621   0.188   0.8509  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.925 on 371 degrees of freedom
Multiple R-squared:  0.01353,   Adjusted R-squared:  0.005554 
F-statistic: 1.696 on 3 and 371 DF,  p-value: 0.1674
# Scatter plot
fig_lm9_scatter <- participants_df |> 
  ggplot(aes(x = rs, y = postC_TP)) +
  geom_point(aes(color = fi)) +
  geom_smooth(method = lm, formula = y ~ x, se = FALSE, color = "steelblue") +
  scale_color_gradientn(
    colors = c("#d7191c", "#fdae61", "#fee08b", "#a6d96a", "#1a9641")
  ) + 
  scale_x_continuous(limits = c(55, 145), breaks = seq(55, 145, 10)) + 
  scale_y_continuous(limits = c(0, 5), breaks = seq(0, 5, 1)) + 
  labs(
    title = "Model 9: TP ~ RS + FI + Age",
    x = "Reading Skill",
    y = "Neural Basis of Phonological Processing (TP)",
    color = "Fluid Intelligence"
  ) +
  theme_bw()

print(fig_lm9_scatter)

# Model summary
lm10 <- lm(postC_vOT ~ pa + fi + age, data = participants_df)

summary(lm10)

Call:
lm(formula = postC_vOT ~ pa + fi + age, data = participants_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.9709 -0.9748 -0.1021  0.7325  7.7578 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.690436   0.847191   1.995 0.046736 *  
pa           0.087203   0.028949   3.012 0.002771 ** 
fi          -0.004293   0.006078  -0.706 0.480396    
age          0.146924   0.042044   3.495 0.000532 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.453 on 371 degrees of freedom
Multiple R-squared:  0.04949,   Adjusted R-squared:  0.0418 
F-statistic: 6.438 on 3 and 371 DF,  p-value: 0.000293
# Scatter plot
fig_lm10_scatter <- participants_df |> 
  ggplot(aes(x = pa, y = postC_vOT)) +
  geom_point(aes(color = fi)) +
  geom_smooth(method = lm, formula = y ~ x, se = FALSE, color = "steelblue") +
  scale_color_gradientn(
    colors = c("#d7191c", "#fdae61", "#fee08b", "#a6d96a", "#1a9641")
  ) +
  scale_x_continuous(limits = c(2, 19), breaks = seq(2, 19, 2)) + 
  scale_y_continuous(limits = c(0, 12), breaks = seq(0, 12, 2)) +
  labs(
    title = "Model 10: vOT ~ PA + FI + Age",
    x = "Phonological Awareness",
    y = "Neural Basis of Orthographical Processing (vOT)",
    color = "Fluid Intelligence"
  ) +
  theme_bw()

print(fig_lm10_scatter)

# Model summary
lm11 <- lm(postC_IFG ~ rs + fi + age, data = participants_df)

summary(lm11)

Call:
lm(formula = postC_IFG ~ rs + fi + age, data = participants_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.6590 -0.9280  0.0021  0.8680  4.0451 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.332909   0.882675   0.377   0.7063    
rs           0.010992   0.005731   1.918   0.0559 .  
fi          -0.001367   0.005714  -0.239   0.8111    
age          0.205805   0.039525   5.207 3.19e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.373 on 371 degrees of freedom
Multiple R-squared:  0.07621,   Adjusted R-squared:  0.06874 
F-statistic:  10.2 on 3 and 371 DF,  p-value: 1.804e-06
# Scatter plot
fig_lm11_scatter <- participants_df |> 
  ggplot(aes(x = rs, y = postC_IFG)) +
  geom_point(aes(color = fi)) +
  geom_smooth(method = lm, formula = y ~ x, se = FALSE, color = "steelblue") +
  scale_color_gradientn(
    colors = c("#d7191c", "#fdae61", "#fee08b", "#a6d96a", "#1a9641")
  ) +
  scale_x_continuous(limits = c(55, 145), breaks = seq(55, 145, 10)) +
  scale_y_continuous(limits = c(0, 8.5), breaks = seq(0, 8, 2)) +
  labs(
    title = "Model 11: IFG ~ RS + FI + Age",
    x = "Reading Skill",
    y = "Neural Basis of Lexical Processing (IFG)",
    color = "Fluid Intelligence"
  ) +
  theme_bw()

print(fig_lm11_scatter)

# Model summary
lm12 <- lm(postC_IFG ~ pa + fi + age, data = participants_df)

summary(lm12)

Call:
lm(formula = postC_IFG ~ pa + fi + age, data = participants_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.7451 -0.9603 -0.0205  0.8628  3.8340 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.834471   0.796008   1.048  0.29517    
pa           0.076964   0.027200   2.829  0.00492 ** 
fi          -0.003511   0.005710  -0.615  0.53902    
age          0.215497   0.039504   5.455 8.96e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.365 on 371 degrees of freedom
Multiple R-squared:  0.08675,   Adjusted R-squared:  0.07937 
F-statistic: 11.75 on 3 and 371 DF,  p-value: 2.278e-07
# Scatter plot
fig_lm12_scatter <- participants_df |> 
  ggplot(aes(x = pa, y = postC_IFG)) +
  geom_point(aes(color = fi)) +
  geom_smooth(method = lm, formula = y ~ x, se = FALSE, color = "steelblue") +
  scale_color_gradientn(
    colors = c("#d7191c", "#fdae61", "#fee08b", "#a6d96a", "#1a9641")
  ) +
  scale_x_continuous(limits = c(2, 19), breaks = seq(2, 19, 2)) + 
  scale_y_continuous(limits = c(0, 8.5), breaks = seq(0, 8, 2)) +
  labs(
    title = "Model 12: IFG ~ PA + FI + Age",
    x = "Phonological Awareness",
    y = "Neural Basis of Lexical Processing (IFG)",
    color = "Fluid Intelligence"
  ) +
  theme_bw()

print(fig_lm12_scatter)